// ============================================================================
//
// This shader is derived from one by Z22 that "sliced" an image according to
// its brightness, stacking the slices with varying offsets, to create one of
// a stereo pair of images from a 2D image.
//
// This version creates both left and right images of the stereo pair using a
// single shader rather than having to use two and allows various  parameters
// controlling the process to be specified in the scene file rather than hard
// coding them here.
//
// Note,  the input to this shader for processing should comprise a centrally
// located image surrounded by a large transparent area.  Although  iStripper
// clips, in the form of clipSprites, are not of this form,  being cropped to
// a minimal sized rectangle containing the image,  they are easily converted
// to that form by rendering them into a suitably sized framebuffer. Refer to
// the accompanying scene files for examples of how to do this.
//
// This shader uses items from ThEmuLib but is not a part of that library. At
// some time in the future I may add a similar shader to that library.
//
// TheEmu.
//
// Note, this is still an early version of the shader and some of its details
// may change significantly, in particular the default values for its control
// parameters may change following tuning and extra control parameters may be
// added. And, of course, there may be a bug or two that may need fixing. You
// may use it however you want, but as this is a preliminary version anything
// may change and you should not rely on any continuity being maintained with
// any later versions until at least version 1.0.0 is released.
//
// ============================================================================
// ==                                                                        ==
// == Update history:                                                        ==
// ==                                                                        ==
// ==   2017/01/07 - v0.0.0 - Preliminary version - see warning note above.  ==
// ==   2017/01/08 - v0.0.1 - Preliminary version - see warning note above.  ==
// ==   2017/01/09 - v0.0.2 - Preliminary version - see warning note above.  ==
// ==   2017/01/10 - v0.0.3 - Preliminary version - see warning note above.  ==
// ==   2017/01/11 - v0.0.4 - Preliminary version - see warning note above.  ==
// ==   2017/01/12 - v0.0.5 - Preliminary version - see warning note above.  ==
// ==   2017/01/14 - v0.0.6 - Preliminary version - see warning note above.  ==
// ==   2017/01/15 - v0.0.7a- Preliminary version - see warning note above.  ==
// ==   2017/01/16 - v0.0.8 - Preliminary version - see warning note above.  ==
// ==                                                                        ==
// ============================================================================

// ============================================================================
// == Primary shader tailoring.                                              ==
// ============================================================================

// This shader can be tailored to use any number of slices, up to a predefined
// maximum, simply by modifying the value defined for the following symbol. At
// present the absolute maximum value for this parameter is 64, and this limit
// can only be raised by changing the main body of the code.
//
// Several predefined versions of this shader differing only in the value used
// for NUMBER_OF_SLICES will be delivered but users may add others should they
// need a version with a particular value that is not one of those delivered.
//
// If a user changes NUMBER_OF_SLICES the name of the shader should be changed
// to match. If any other changes are made then please change the name so that
// it can be easily distinguished from the basic delivered version.

#define NUMBER_OF_SLICES 12 // Should agree with number in this file's name.

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

// This shader has some built in debugging facilities (see below for details).
// This debugging is enabled whenever ENABLE_DEBUGGING is defined which can be
// done by uncommenting the following statement. The value used for the symbol
// ENABLE_DEBUGGING is irrelevant, only whether or not it is defined.

// #define ENABLE_DEBUGGING // Enable debugging if not commented out.

// ============================================================================
// == Standard shader inputs ==================================================
// ============================================================================

uniform vec2 u_WindowSize; // Window dimensions in pixels
uniform sampler2D Image;   // The image to be manipulated
uniform float u_Elapsed;   // Elapsed time in seconds

// ============================================================================
//
// The GLSL shader language currently provides no mechanism for importing  any
// elements that are defined in other modules, not even C's crude source level
// #include mechanism. In the absence of anything better TheEmuLib handles any
// imports by manually copying relevant utility code snippets from the sources
// in the Shader Lib.Inc directory. This is very crude but I have attempted to
// be systematic in the way in which this is presented in the library sources.
//
// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

// Macros from TheEmuLib.Emu_Common_Utilities.lib.src

#define EMU_DEFAULT(type,x,default_value) ( (x==type(0.0)) ? (default_value) : (x) )

// ============================================================================
// == Shader specific inputs ==================================================
// ============================================================================

// Fraction of the window dimensions by which the two images are to be offset
// from the centre of the window. Normally only the X component should be set
// to a non-zero value to give horizontally separated images but you can also
// generate vertical or even diagonal separation by using the Y component. If
// no image offset is specified then the default will separate the centres of
// the images by 0.3 of the width of the window width using by equal  offsets
// in the left and right directions. You can adjust this parameter by using a
// uniform: clause in the .scn file of the form
//
//     uniform: Emu_3D_Stereo_Offset, vec2, x, y
//
// where x and y are floating point values in the range 0.0 to 1.0. When this
// is used the value for y will normally be 0.0 so that there is a horizontal
// stereo offset but no vertical offset.

uniform vec2 Emu_3D_Stereo_Offset;
vec2 Stereo_Offset = EMU_DEFAULT ( vec2, Emu_3D_Stereo_Offset, vec2(0.2,0.0) );

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

// Parameters controlling the amount by which slices are to be offset. The max
// parallax offset,  Emu_3D_Max_Parallax,  specifies how much the top slice of
// the stack is displaced relative to the bottom slice which is not shifted. A
// clause of the form
//
//   uniform : Emu_3D_Max_Parallax, vec2, x, y
//
// where x and y are floating point values may be used to adjust  this  aspect
// of the shader. When this is used the value for y will normally be 0.0 which
// produces shifts with horizontal but no vertical components.
//
// Note,  the value of Max_Parallax controls the amount by which each slice in
// the stack is shifted relative to each other, if it is large then the shifts
// are large and may result is gaps appearing in the image whenever slices are
// shifted relative their neighbours by more than their own local width.  This
// is most noticeable when it involves the bottom layer to leave a transparent
// area between the slices.  The effect is lower when there are a large number
// of slices and higher when when there are few.

#define MAX_PARALLAX vec2(0.005,0.0) // Default to 0.5% of the window width.

uniform vec2 Emu_3D_Max_Parallax;
vec2 Max_Parallax = EMU_DEFAULT ( vec2, Emu_3D_Max_Parallax, MAX_PARALLAX );

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

// By default the image pair will be displayed with a fully transparent black
// background. This may be overridden by using a clause of the form
//
//   uniform : Emu_3D_BG_rgba, vec4, r, g, b, a
//
// where r, g, b and a are floating point values specifying the values to use
// for the red, green, blue and alpha channels of the background.  Each of r,
// g. b and a should be in the range 0.0 to 1.0 inclusive. The default is for
// all four components to be zero, i.e. fully transparent black.

uniform vec4 Emu_3D_BG_rgba;
vec4 BG_rgba = EMU_DEFAULT ( vec4, Emu_3D_BG_rgba, vec4(0.0) );

// ============================================================================
//
// A note on the shader's implementation.
//
// This shader has been implemented such that
//
// 1) It is a simple matter to tailor it to support any number of 'slices'.
//
// 2) As much as possible the processing of individual slices can proceed in
// parallel.
//
// To support the first of these considerations preprocessor macros are  used
// such that it is only necessary to specify the number of slices that are to
// be used by defining the preprocessor symbol NUMBER_OF_SLICES at the  start
// of the shader (see above) and the rest of the shader automatically adjusts
// to that value. This is achieved by making any code that references a slice
// be conditional on that slice's index, i.e. its position in the stack. This
// is done using the DO_IF macro described below. An important aspect of this
// is that the slice index used as the first argument to that macro is always
// a compile time constant which allows the test to be optimised away and the
// statement that it controls either to be unconditionally executed or to  be
// eliminated from the generated code.
// 
// There is a predefined upper limit for NUMBER_OF_SLICES but should there be
// a need exceed the current limit it would be a very simple matter to modify
// the shader body to accommodate a larger range by a factor of 2, 4 or 8, or
// indeed by any small power of two.
//
// To support the second consideration the code has been written such that as
// possible can be executed in parallel with the processing required for each
// slice being independent of that of other slices.  The overall structure of
// the shader is thus
//
//    Step 1 - Initialise
//    Step 2 - For each slice: sample the input image
//    Step 3 - Calculate D the "depth" parameter
//    Step 4 - For each slice: determine if current pixel is part of it
//    Step 4a - Optionally generate debug information
//    Step 5 - Flatten the stack of slices
//    Step 6 - Output the final result
//
// Of these steps 1, 3 and 6 are trivial while steps 2 and 4 operate on  each
// slice separately and the code for each slice can run in parallel with that
// for all other slices. Step 5 is a little more complex as the result of the
// flattening depends on all the slices, however it is done in a small number
// of stages first by flattening pairs of neighbouring slices then flattening
// the results of pairs of the outputs of the first stage etc.  At each stage
// the number of slices is reduced by a factor of two and at each  stage  the
// operations on pairs of slices can run in parallel.  As a result although a
// lot of work is done by this shader it is not slow.  In  order to emphasise
// the parallelism I have written operations that can run in parallel on  the
// same line whenever there is enough space to do so and have arranged  these
// in blocks if there is not enough space to fit everything on a single line.
//
// Step 4a above is normally totally suppressed,  but even it is written such
// that it can be executed in parallel for each of the "debug slices".
//
// It may also be noted that although at first glance the mechanisms used for
// this shader might only work when NUMBER_OF_SLICES is a power of  two  this 
// is not the case. The code has been designed such that it will work for any
// number of slices.

// ============================================================================

// The main body of this shader has been written to permit it to be configured
// for any number of slices, up to a fixed limit, simply by changing the value
// for NUMBER_OF_SLICES which is defined above. The following macro is used to
// make operations on a slice conditional on whether or not that slice exists,
// i.e. if its index, N, is in the range 0 to NUMBER_OF_SLICES-1 inclusive. At
// first sight this looks as if it adds the overhead of an if statement but as
// the values used for N when invoking this macro the shader  compiler  should
// optimise away the test and thus either emit code to unconditionally execute
// the specified statement, STMT, or emit no code at all.

#define DO_IF(N,STMT) if ( (NUMBER_OF_SLICES) >= (N) ) { STMT; }

// ============================================================================

// Debugging is performed by use of the DEBUG macro which will either insert
// debugging code into the shader or have no effect depending on whether  or
// not the ENABLE_DEBUGGING symbol was defined above.

#ifdef ENABLE_DEBUGGING
   #define DEBUG(stmt) stmt; // Include debugging code.
#else
   #define DEBUG(stmt)       // Disable debugging code.
#endif

// ============================================================================
// == The shader's major function =============================================
// ============================================================================

void main ()
 {

   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

   // Step 1 - Initialise.

   // Scale from screen coordinates to texture coordinates that each have the
   // range 0.0 to 1.0. At the same time correct the up-down (not left-right)
   // inversion that occurs for an iStripper clip.  The  left-right inversion
   // is not corrected here as scene creators are quite used to having to fix
   // that in the .scn file however they are not so used to having to fix the
   // up-down inversion which is also a little harder fix in the scene file.

   vec2 uv = gl_FragCoord.xy / u_WindowSize;
   uv.y = 1.0 - uv.y; // Get the clip the right way up!

   // Get the slice offset scale factor. This is the amount by which a slice
   // is shifted relative to its neighbours under the assumption that all of
   // the slices are equally thick or more generally it is the average shift
   // relative to the neighbouring slices when the slice thickness can vary.

   vec2 ds = Max_Parallax / float(NUMBER_OF_SLICES);

   // The array Slice will hold the individual slices as they  are  generated
   // and the results of manipulating them. The code only accesses elements 0
   // to NUMBER_OF_SLICES-1 because the DO_IF macro prevents execution of any
   // code that reference elements with larger indices.  However, although no
   // accesses to these elements would ever be made the compiler is likely to
   // generate warning or error messages for the references to them. A simple
   // solution would be to dimension the array so that the compiler is  happy
   // but that would be wasteful so instead I have added a single extra slice
   // and a macro to map all out of range indices to that extra element. Like
   // DO_IF it may look as if this entails extra code being executed  but  it
   // uses constant values and should be optimised away by the compiler.

   vec4 Slice[NUMBER_OF_SLICES+1];
   Slice[NUMBER_OF_SLICES] = vec4(0.0);
   #define S(n) Slice [ ((n)<(NUMBER_OF_SLICES)) ? (n) : (NUMBER_OF_SLICES) ]

   // The bottom "slice" is handled slightly differently to the others and is
   // set here to directly sample the input image with no shift applied, with
   // no brightness threshold and no dependency on its opacity.  However, the
   // the point at which the image is sampled and the slice  shift  direction
   // depends on which half of the window the current pixel lies in, but with
   // the extra complication that naively dividing the window into two halves
   // results in the image being truncated when it extends into  the  "wrong"
   // half of the window. The code used here therefore determines whether the
   // image is to be sampled for the left or right member of the stereo image
   // pair by allowing each to intrude into the others half of the window but
   // only where it intrudes onto fully transparent pixels. Visibly this will
   // look as if the intruding image lies behind what is being intruded on.

   { vec2 Q = 1.0 - 2.0*step(vec2(0.5),uv);

     vec2 Offset = Q * Stereo_Offset;

     vec2 uv_2 = fract ( uv - Offset );
          uv   = fract ( uv + Offset );

     ds *= Q;

     // The two alternatives can be looked up in parallel.

     S(0) = texture2D(Image,uv); vec4 S0_2 = texture2D(Image,uv_2); 

     if ( S(0).a <= 0.0 ) 
      { uv = uv_2;
        ds = -ds;
        S(0) = S0_2;
      }

   }
 
   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

   // Step 2 - For each slice: Sample the input image.

   // The following macro determines the shift to be used for the n'th slice.
   // The curve used here is a circle (or an ellipse after any scaling) which
   // approximates the cross section of a limb or a torso. The result is that
   // pixels near the edge of a body part will be shifted less than  those in
   // the middle.  Using a curve of this sort reduces artefacts in the output
   // from the shader - such as fringing and partially doubled images - which
   // are seen when a simple linear relationship is used. 

   #define N2(n) float ( (n) * (n) )
   #define N2MAX N2(NUMBER_OF_SLICES)
   #define SLICE_SHIFT(n) ( ( ( 1.0 - sqrt(1.0-N2(n)/N2MAX) ) * float(n) ) * ds )

   // The texture lookups are independent and therefore they can  execute  in
   // parallel with each other.  They sample the input image with the offsets
   // that are appropriate to each "slice".  Here the slices are all of equal
   // thickness while the offsets are non-linear with respect to slice number
   // but a non-uniform set of slice thicknesses could also be used. The S(0)
   // slice has already been sampled so we do not do it again.

   #define SAMPLE(n) \
     DO_IF ( n, S(n) = texture2D ( Image, uv + SLICE_SHIFT(n) ); \
             S(n).a = float ( S(n).a > 0.0 );                    \
           )

              SAMPLE( 1) SAMPLE( 2) SAMPLE( 3) SAMPLE( 4) SAMPLE( 5) SAMPLE( 6) SAMPLE( 7)
   SAMPLE( 8) SAMPLE( 9) SAMPLE(10) SAMPLE(11) SAMPLE(12) SAMPLE(13) SAMPLE(14) SAMPLE(15)
   SAMPLE(16) SAMPLE(17) SAMPLE(18) SAMPLE(19) SAMPLE(20) SAMPLE(21) SAMPLE(22) SAMPLE(23)
   SAMPLE(24) SAMPLE(25) SAMPLE(26) SAMPLE(27) SAMPLE(28) SAMPLE(29) SAMPLE(30) SAMPLE(31)
   SAMPLE(32) SAMPLE(33) SAMPLE(34) SAMPLE(35) SAMPLE(36) SAMPLE(37) SAMPLE(38) SAMPLE(39)
   SAMPLE(40) SAMPLE(41) SAMPLE(42) SAMPLE(43) SAMPLE(44) SAMPLE(45) SAMPLE(46) SAMPLE(47)
   SAMPLE(48) SAMPLE(49) SAMPLE(40) SAMPLE(51) SAMPLE(52) SAMPLE(53) SAMPLE(54) SAMPLE(55)
   SAMPLE(56) SAMPLE(57) SAMPLE(58) SAMPLE(59) SAMPLE(60) SAMPLE(61) SAMPLE(62) SAMPLE(63)

   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

   // Step 3 - Calculate D the "depth" parameter 

   // The image will be sliced according their values of D which is used as a
   // measure the overall darkness used as proxy for "depth" under the rather
   // shaky assumption that bright areas are nearer to the camera than darker
   // ones - but it works well enough for the iStripper clips. D is 1.0 for a
   // maximally bright white point, 0.0 for a maximally dark black point. You
   // can think of D as standing for Darkness or Depth, or for both though it
   // increases in the direction of increasing lightness.  D should be in the
   // range 0.0 to 1.0, and will be unless out of range colour components are
   // present in the original image - which has been known to happen when the
   // image is itself generated by a fragment shader.

   float D = dot ( S(0).rgb, vec3(0.3333333333333333) );

   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

   // Step 4 - For each slice: Determine if current pixel is part of it.

   // The SLICE macro is used to determine if D belongs to a particular slice
   // by comparing it with the lower bounds for that slice. This will include
   // everything above the slice but the way that the slices are now combined
   // (by the FLATTENING stage) means that this is not a problem. Not only is
   // it not a problem but it greatly reduces the visibility of the artefacts
   // that can result when a slice is shifted by more than a  pixel  relative
   // to its neighbouring slice which can open up gaps in the image when both
   // the upper and lower bound are used to select the pixels for the slice.

   #define SLICE_A(a) ( step(a,D)     )
   #define SLICE_B(b) ( 1.0-step(b,D) )

   #define LO_BOUND(n) ( float(n) / float(NUMBER_OF_SLICES) )
   #define HI_BOUND(n) LO_BOUND((n)+1) // Currently not used.

   #define SLICE(n) \
     DO_IF ( n, S(n) *= SLICE_A(LO_BOUND(n)) )

   // The following set of slicing operations can execute in parallel.

             SLICE( 1) SLICE( 2) SLICE( 3) SLICE( 4) SLICE( 5) SLICE( 6) SLICE( 7)
   SLICE( 8) SLICE( 9) SLICE(10) SLICE(11) SLICE(12) SLICE(13) SLICE(14) SLICE(15)
   SLICE(16) SLICE(17) SLICE(18) SLICE(19) SLICE(20) SLICE(21) SLICE(22) SLICE(23)
   SLICE(24) SLICE(25) SLICE(26) SLICE(27) SLICE(28) SLICE(29) SLICE(30) SLICE(31)
   SLICE(32) SLICE(33) SLICE(34) SLICE(35) SLICE(36) SLICE(37) SLICE(38) SLICE(39)
   SLICE(40) SLICE(41) SLICE(42) SLICE(43) SLICE(44) SLICE(45) SLICE(46) SLICE(47)
   SLICE(48) SLICE(49) SLICE(40) SLICE(51) SLICE(52) SLICE(53) SLICE(54) SLICE(55)
   SLICE(56) SLICE(57) SLICE(58) SLICE(59) SLICE(60) SLICE(61) SLICE(62) SLICE(63)

   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

   // Step 4a - Optionally generate debug information

   // If debugging is enabled then a subset of the slices will be rendered in
   // a solid debug colour where the colour used encodes the slice index. The
   // debugging permits you to see what parts of the generated image are from
   // the particular selected set of slices. Debug colouration can be enabled
   // and disabled by manipulating debug.a, as is done here, as a function of
   // time to regularly switch between debugging and normal colouration.

   // Declare and initialise 'debug'. If a value of vec4(0.0) is used used as
   // the initial value for here the debugged slices are interleaved with the
   // other slices which will be displayed with their normal colour but if it
   // is initialised to (0.0,0.0,0.0,1.0) only the debug slices are displayed
   // when debugging is enabled.

   DEBUG ( vec4 debug = vec4(0.0,0.0,0.0,1.0) ) // Used for non-debug slices.

   // Up to 7 debug slices are selected for debugging, they are evenly spaced
   // among the slices that the shader has been configured to use and each is
   // lowest member of a group of m_debug slices except for the highest which
   // will represent few slices when the number of slices is not divisible by 
   // n_debug.  Of the colours used here the primary colours, i.e. red, green
   // and blue, are used for the deepest debug slices,  secondary colours for
   // less deep slices (when there are more than 3) and white for the topmost
   // debug slice if there are more than 6. The SET_DEBUG_COLOUR macro relies
   // on the opacity of a slice being either 0.0 or 1.0 in order to set debug
   // to be transparent black or to be the specified colour.

   #define n_debug min ( 7, (NUMBER_OF_SLICES)         )
   #define m_debug max ( 1, (NUMBER_OF_SLICES)/n_debug )

   #define SET_DEBUG_COLOUR(n,r,g,b) \
     DO_IF ( n, debug += vec4(r,g,b,1.0) * S(n).a; )

   // This set of debugging statements can execute in parallel.

   DEBUG ( SET_DEBUG_COLOUR ( 0*m_debug, 1.0, 0.0, 0.0 ) ) // red
   DEBUG ( SET_DEBUG_COLOUR ( 1*m_debug, 0.0, 1.0, 0.0 ) ) // green
   DEBUG ( SET_DEBUG_COLOUR ( 2*m_debug, 0.0, 0.0, 1.0 ) ) // blue
   DEBUG ( SET_DEBUG_COLOUR ( 3*m_debug, 1.0, 1.0, 0.0 ) ) // yellow
   DEBUG ( SET_DEBUG_COLOUR ( 4*m_debug, 1.0, 0.0, 1.0 ) ) // magenta
   DEBUG ( SET_DEBUG_COLOUR ( 5*m_debug, 0.0, 1.0, 1.0 ) ) // cyan
   DEBUG ( SET_DEBUG_COLOUR ( 6*m_debug, 1.0, 1.0, 1.0 ) ) // white

   // Optionally cyclically enable and disable the debug colouration. You can
   // change the time multiplier to change the period or change the threshold
   // to change the fraction of time within each cycle for which debugging is
   // enabled.  The multiplier and the threshold values should both be in the
   // range 0.0 to 1.0 for this to work properly.

   DEBUG ( debug.a *= step ( 0.5, fract(u_Elapsed*0.2) ) ) 

   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

   // Step 5 - Flatten the stack of slices.

   // The following statements execute in parallel within each block but each
   // block must wait for the previous block to provide the required  inputs. 
   // They flatten the stack by successively replacing a slice with a  higher
   // slice when that higher slice is not transparent.  The result eventually
   // replaces the bottom slice, S(0). This is a common pattern when reducing
   // multiple results to a single value. Note, that the number of operations
   // run in parallel at any stage will be less than appear to be coded  here
   // as only those that operate on slices with indices less than the  number
   // of slices that the shader has been configured for really exist, This in
   // turn means that the number of stages will also be reduced. For example,
   // if the number of slices is 16 stage 1 will only perform 8 calculations, 
   // stage 2 only 4, stage 3 only 2, stage 4 only 1 with all higher numbered
   // stages being completely suppressed.

   #define FLATTEN(n,k) \
      DO_IF ( n+k, S(n) = mix ( S(n), S(n+k), float(S(n+k).a>0.0) ) )

   FLATTEN(62,1) FLATTEN(60,1) FLATTEN(58,1) FLATTEN(56,1) // Stage 1
   FLATTEN(54,1) FLATTEN(52,1) FLATTEN(50,1) FLATTEN(48,1)
   FLATTEN(46,1) FLATTEN(44,1) FLATTEN(42,1) FLATTEN(40,1)
   FLATTEN(38,1) FLATTEN(36,1) FLATTEN(34,1) FLATTEN(32,1)
   FLATTEN(30,1) FLATTEN(28,1) FLATTEN(26,1) FLATTEN(24,1)
   FLATTEN(22,1) FLATTEN(20,1) FLATTEN(18,1) FLATTEN(16,1)
   FLATTEN(14,1) FLATTEN(12,1) FLATTEN(10,1) FLATTEN( 8,1)
   FLATTEN( 6,1) FLATTEN( 4,1) FLATTEN( 2,1) FLATTEN( 0,1)

   FLATTEN(60,2) FLATTEN(56,2) FLATTEN(52,2) FLATTEN(48,2) // Stage 2
   FLATTEN(44,2) FLATTEN(40,2) FLATTEN(36,2) FLATTEN(32,2)
   FLATTEN(28,2) FLATTEN(24,2) FLATTEN(20,2) FLATTEN(16,2)
   FLATTEN(12,2) FLATTEN( 8,2) FLATTEN( 4,2) FLATTEN( 0,2)

   FLATTEN(56,4) FLATTEN(48,4) FLATTEN(40,4) FLATTEN(32,4) // Stage 3
   FLATTEN(24,4) FLATTEN(16,4) FLATTEN( 8,4) FLATTEN( 0,4)

   FLATTEN(48,8) FLATTEN(32,8) FLATTEN(16,8) FLATTEN(0,8)  // Stage 4

   FLATTEN(32,16) FLATTEN(0,16)                            // Stage 5

   FLATTEN(0,32)                                           // Stage 6

   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

   // Step 6 - Output the result of the slicing and flattening.

   // Output the final value for the pixel.  Background  transparent  areas
   // are replaced by the colour specified by BG_rgba  which  will  have no
   // effect unless the default has been overridden). The Multiplication by
   // gl_Color means that any color: or opacity: clauses used the .scn file
   // when invoking this shader will be effective. If no color: or opacity:
   // clauses are used gl_Color is (1,1,1,1) and thus have no effect on the
   // output.

   gl_FragColor = mix(BG_rgba,S(0),float(S(0).a>0.0)) * gl_Color;

   // If debugging is enabled output any debug colouration.

   DEBUG ( if ( debug.a > 0.5 ) { gl_FragColor = debug; } )
   DEBUG ( if ( S(0).a <= 0.0 ) gl_FragColor = vec4(0.1,0.2,0.2,0.5); )

   // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 

 }